# JUDICIAL PROFESSIONAL BACKGROUND AND PRETRIAL DETENTION OUTCOMES
# Oded Oren and Chad Topaz
# Robustness checks — Appendix Tables A1–A6
library("this.path")
source(paste0(dirname(this.path()), "/functions.R"))

# Data Loading ---------------------------------------------------------------
judgeinfo <- readRDS(paste0(dirname(this.path()), "/data/judgeinfo.RDS"))
detentiondata <- readRDS(paste0(dirname(this.path()), "/data/detentiondata.RDS"))
baildata <- readRDS(paste0(dirname(this.path()), "/data/baildata.RDS"))
excludeddata_detained = readRDS(paste0(dirname(this.path()), "/data/excludeddata_detained.RDS"))
excludeddata_bail = readRDS(paste0(dirname(this.path()), "/data/excludeddata_bail.RDS"))

# Detention (Table A5)-----
## Main model ---------------------------------------------------------------
vars_detention <- setdiff(names(detentiondata), c("detained", "judge", "law_enforcement", "legal_services"))

model_detention <- feols(
  as.formula(paste("detained ~", paste(vars_detention, collapse = " + "))),
  data = detentiondata,
  cluster = ~ judge)

## Logit (robustness) ------------------------------------------------------
model_logit <- feglm(
  fml     = as.formula(paste("detained ~", paste(vars_detention, collapse = " + "))),
  data    = detentiondata,
  family  = binomial("logit"),
  cluster = ~ judge
)

lev <- levels(detentiondata$experience)
d1 <- detentiondata; d1$experience <- factor("Law Enforcement", levels = lev)
d0 <- detentiondata; d0$experience <- factor("None",            levels = lev)

p1 <- predict(model_logit, newdata = d1, type = "response")
p0 <- predict(model_logit, newdata = d0, type = "response")
AME_det <- mean(p1 - p0)
AME_det

# Extensive (Table A5)------
## Main model ---------------------------------------------------------------
data_extensive <- baildata %>%
  mutate(bail_set = cash.bail > 0)

vars_extensive <- setdiff(
  names(data_extensive),
  c("bail_set",
    "cash.bail",
    "judge",
    "law_enforcement",
    "legal_services")
)

model_extensive <- feols(
  as.formula(paste("bail_set ~", paste(vars_extensive, collapse = " + "))),
  data    = data_extensive,
  cluster = ~ judge
)

## Logit (robustness) ------------------------------------------------------
model_logit_int <- feglm(
  fml     = as.formula(paste("bail_set ~", paste(vars_extensive, collapse = " + "))),
  data    = data_extensive,
  family  = binomial("logit"),
  cluster = ~ judge
)

d1 <- data_extensive
d0 <- data_extensive
d1$experience <- factor("Law Enforcement", levels = levels(data_extensive$experience))
d0$experience <- factor("None",            levels = levels(data_extensive$experience))

p1  <- predict(model_logit_int, newdata = d1, type = "response")
p0  <- predict(model_logit_int, newdata = d0, type = "response")
AME <- mean(p1 - p0)
AME

# Intensive (Table A6)------
## Main model -------------------------------------------------------------
data_intensive = baildata %>%
  filter(cash.bail > 0) %>%
  mutate(log_bail = log(cash.bail))

model_intensive <- lm(
  log_bail ~ . - judge - cash.bail - law_enforcement - legal_services - log_bail,
  data = data_intensive)

vcov_intensive  <- vcovCL(model_intensive,
                          data = data_intensive,
                          cluster = ~judge,
                          type = "HC1")

print(coeftest(model_intensive, vcov = vcov_intensive))

## Alternative: log(cash.bail + 1e-6) ------------------------------------
data_logplus = baildata %>%
  mutate(log_bail = log(cash.bail+1e-6))

model_logplus <- lm(
  log_bail ~ . - judge - cash.bail - law_enforcement - legal_services - log_bail,
  data = data_logplus)

vcov_logplus  <- vcovCL(model_logplus,
                        cluster = data_logplus$judge,
                        type = "HC1")

print(coeftest(model_logplus, vcov = vcov_logplus))

## Alternative: Gamma GLM (log link) -------------------------------------
rhs_vars <- setdiff(
  names(data_intensive),
  c("cash.bail", "bail_set", "log_bail", "judge",
    "law_enforcement", "legal_services")   # outcomes & IDs
)
model_step2_gamma <- feglm(
  as.formula(paste("cash.bail ~", paste(rhs_vars, collapse = " + "))),
  data    = data_intensive,
  family  = Gamma(link = "log"),
  cluster = ~ judge
)
summary(model_step2_gamma)

# Exclusions (Table A1)-----
### Detention ---------------------------------------------------------------
vars_detention_excluded <- setdiff(
  names(excludeddata_detained),
  c("detained", "judge", "law_enforcement", "legal_services", "disposed_case", "cash.bail",
    "unclear", "detained_not_bail_eligible", "placeholder_bail", "excluded"))

model_detention_excluded <- feols(
  as.formula(paste("detained ~", paste(vars_detention_excluded, collapse = " + "))),
  data = excludeddata_detained,
  cluster = ~ judge
)
summary(model_detention_excluded)

### Bail: Extensive ---------------------------------------------------------
data_extensive_excluded <- excludeddata_bail %>%
  mutate(bail_set = cash.bail >= 100)

vars_extensive_excluded <- setdiff(
  names(data_extensive_excluded),
  c(
    "excluded",
    "detained",
    "disposed_case",
    "bail_set",
    "cash.bail",
    "judge",
    "law_enforcement",
    "legal_services")
)

model_extensive_excluded <- feols(
  as.formula(paste("bail_set ~", paste(vars_extensive_excluded, collapse = " + "))),
  data    = data_extensive_excluded,
  cluster = ~ judge
)

summary(model_extensive_excluded)

### Detention (Judges with 50+ cases) --------------------------------------
excludeddata_50 = excludeddata_detained %>%
  group_by(judge) %>%
  filter(n() >= 50) %>%
  ungroup()

vars_detention_excluded_50 <- setdiff(
  names(excludeddata_50),
  c("detained", "judge", "law_enforcement", "legal_services", "disposed_case", "cash.bail",
    "unclear", "detained_not_bail_eligible", "placeholder_bail", "excluded"))

model_detention_excluded_50 <- feols(
  as.formula(paste("detained ~", paste(vars_detention_excluded_50, collapse = " + "))),
  data = excludeddata_50,
  cluster = ~ judge
)
summary(model_detention_excluded_50)

### Bail: Extensive (Judges with 50+ cases) --------------------------------
data_extensive_excluded_50 <- excludeddata_bail %>%
  mutate(bail_set = cash.bail >= 100) %>% #excludes admin bail amounts, 1-99
  group_by(judge) %>%
  filter(n() >= 50) %>%
  ungroup()

vars_extensive_excluded_50 <- setdiff(
  names(data_extensive_excluded_50),
  c(
    "excluded",
    "detained",
    "disposed_case",
    "bail_set",
    "cash.bail",
    "judge",
    "law_enforcement",
    "legal_services")
)

model_extensive_excluded_50 <- feols(
  as.formula(paste("bail_set ~", paste(vars_extensive_excluded_50, collapse = " + "))),
  data    = data_extensive_excluded_50,
  cluster = ~ judge
)

summary(model_extensive_excluded_50)

## Wald Test on Experience -----------------------------------------------------
wald_excluded = excludeddata_detained %>% feols(
  excluded ~ age + gender_defendant + topseverity + supervision + chargecategory + offenselevel +
    misd.convictions + nvfo.convictions + vfo.convictions +
    misd.pending + nvfo.pending + vfo.pending +
    i(experience, ref = "None") | county^arraigndate^arresttype,
  cluster = ~ judge
)

wald_excluded_result = wald(wald_excluded, keep = "^experience::")


# Post Bail Reform (2022-2023) (Table A4)------
## Detention ---------------------------------------------------------------
d_2022_2023 = detentiondata %>%
  filter(as.Date(arraigndate) >= "2022-01-01")

vars_detention <- setdiff(names(detentiondata), c("detained", "judge", "law_enforcement", "legal_services"))

model_2022_2023 <- feols(
  as.formula(paste("detained ~", paste(vars_detention, collapse = " + "))),
  data = d_2022_2023,
  cluster = ~ judge)

summary(model_2022_2023)

## Extensive ---------------------------------------------------------------
data_extensive <- baildata %>%
  mutate(bail_set = cash.bail > 0)

vars_extensive <- setdiff(
  names(data_extensive),
  c("bail_set", "cash.bail", "judge", "law_enforcement", "legal_services")
)

b_2022_2023 <- data_extensive %>%
  filter(as.Date(arraigndate) >= "2022-01-01")

model_extensive_2022_2023 <- feols(
  as.formula(paste("bail_set ~", paste(vars_extensive, collapse = " + "))),
  data    = b_2022_2023,
  cluster = ~ judge
)

print(summary(model_extensive_2022_2023))

## Intensive ---------------------------------------------------------------
data_intensive <- baildata %>%
  filter(cash.bail > 0) %>%
  mutate(log_bail = log(cash.bail))

vars_intensive <- setdiff(
  names(data_intensive),
  c("log_bail", "cash.bail", "judge", "law_enforcement", "legal_services")
)

i_2022_2023 <- data_intensive %>%
  filter(as.Date(arraigndate) >= "2022-01-01")

form_intensive <- as.formula(paste("log_bail ~", paste(vars_intensive, collapse = " + ")))

model_intensive_2022_2023 <- lm(form_intensive, data = i_2022_2023)
vcov_intensive_2022_2023 <- vcovCL(
  model_intensive_2022_2023,
  data    = i_2022_2023,
  cluster = ~ judge,
  type    = "HC1"
)

print(coeftest(model_intensive_2022_2023, vcov = vcov_intensive_2022_2023))

# Generate Tables -----
table_A1(model_detention_excluded,
         model_extensive_excluded,
         model_detention_excluded_50,
         model_extensive_excluded_50)

table_A5(model_detention, model_logit,
         model_extensive, model_logit_int)
table_A6(model_intensive,
                     vcov_intensive,
                     model_step2_gamma,
                     model_logplus,
                     vcov_logplus)

table_A4(
  model_2022_2023,
  model_extensive_2022_2023,
  model_intensive_2022_2023,
  vcov_intensive_2022_2023)
